#include <cuda.h>
#include "cuda/cuda_arch.cuh"

#if __CUDA_ARCH__ < 750
// sm_70
#define ARCH_WARPS_PER_SM 2

#else
// sm_75
#define ARCH_WARPS_PER_SM 4

#endif

static int _arch_sm;

void init_arch()
{
    cudaDeviceProp p;
    cudaGetDeviceProp(&p, 0);
    _arch_sm = p.multiProcessorCount;
}

constexpr int arch_warps_per_sm(){ return ARCH_WARPS_PER_SM; }